PCA plots
This section uses the counts data generated in Section 1 to perform PCA analyses and plot them.
Prerequisites
Packages required for this analyses are loaded as follows:
setwd("~/github/amnion.vs.other_RNASeq")
library(sva)
library(tidyverse)
library(DESeq2)
library(vsn)
library(pheatmap)
library(ggrepel)
library(RColorBrewer)
library(plotly)Import datasets
The counts data and its associated metadata (coldata) are imported for analyses
counts = 'assets/new-counts.tsv'
groupFile = 'assets/new-batch.v2.tsv'
coldata <- read.csv(groupFile, row.names=1, sep="\t", stringsAsFactors = TRUE)
cts <- as.matrix(read.csv(counts,sep="\t",row.names="gene.ids"))Inspect the coldata
DT::datatable(coldata)order and inspect if all the coldata matches the headers of the counts data.
colnames(cts)
#> [1] "BT_EVT_Okae.1" "BT_SCT_Okae.1" "BT_TSC_Okae.1" "BT_EVT_Okae.2"
#> [5] "BT_SCT_Okae.2" "BT_TSC_Okae.2" "CT_EVT_Okae.1" "CT_SCT_Okae.1"
#> [9] "CT_TSC_Okae.1" "CT_EVT_Okae.2" "CT_SCT_Okae.2" "CT_TSC_Okae.2"
#> [13] "CT_EVT_Okae.3" "CT_SCT_Okae.3" "CT_TSC_Okae.3" "n_H9.1"
#> [17] "n_H9.2" "nTE_D1.1" "nTE_D1.2" "nTE_D2.1"
#> [21] "nTE_D2.2" "nTE_D3.1" "nTE_D3.2" "nCT_P3.1"
#> [25] "nCT_P3.2" "nCT_P10.1" "nCT_P10.2" "nCT_P15.1"
#> [29] "nCT_P15.2" "cR_nCT_P15.1" "cR_nCT_P15.2" "nCT_P15_iPSC.1"
#> [33] "nCT_P15_iPSC.2" "CT_Okae.1" "CT_Okae.2" "nST.1"
#> [37] "nST.2" "nEVT.1" "nEVT.2" "pH9.1"
#> [41] "pH9.2" "pBAP_D1.1" "pBAP_D1.2" "pBAP_D2.1"
#> [45] "pBAP_D2.2" "pBAP_D3.1" "pBAP_D3.2" "CT_7wk.1"
#> [49] "CT_7wk.2" "CT_9wk.1" "CT_11wk.1" "amnion_18w.1"
#> [53] "amnion_9w.1" "amnion_16w.1" "amnion_16w.2" "amnion_22w.1"
#> [57] "amnion_9w.2" "amnion_22w.2" "H1_gt70_D8_BAP.1" "H1_gt70_D8_BAP.2"
#> [61] "H1_gt70_D8_BAP.3" "H1_lt40_D8_BAP.1" "H1_lt40_D8_BAP.2" "H1_lt40_D8_BAP.3"
#> [65] "H1_Yabe.1" "H1_Yabe.2" "H1_Yabe.3" "amnion_Term.1"
#> [69] "amnion_Term.2" "amnion_Term.3" "amnion_Term.4" "amnion_Term.5"
#> [73] "amnion_Term.6" "amnion_Term.7" "amnion_Term.8" "amnion_Term.9"
#> [77] "amnion_Term.10" "amnion_Term.11" "amnion_Term.12" "H9_Shao.1"
#> [81] "H9_Shao.2" "H9_Shao.3" "H9_amnion.1" "H9_amnion.2"
#> [85] "H9_amnion.3"
all(rownames(coldata) %in% colnames(cts))
#> [1] TRUE
cts <- cts[, rownames(coldata)]cov1 <- as.factor(coldata$authors)
adjusted_counts <- ComBat_seq(cts, batch=cov1, group=NULL)
#> Found 6 batches
#> Using null model in ComBat-seq.
#> Adjusting for 0 covariate(s) or covariate level(s)
#> Estimating dispersions
#> Fitting the GLM model
#> Shrinkage off - using GLM estimates for parameters
#> Adjusting the data
all(rownames(coldata) %in% colnames(cts))
#> [1] TRUE
cts <- cts[, rownames(coldata)]dds <- DESeqDataSetFromMatrix(countData = adjusted_counts,
colData = coldata,
design = ~ group)vsd <- vst(dds, blind=FALSE)
pcaData <- plotPCA(vsd, intgroup=c("group", "authors"), returnData=TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))ggplotly(ggplot(pcaData, aes(PC1, PC2, color=group.1, shape=authors)) +
scale_shape_manual(values=1:8) +
theme_bw() +
theme(legend.title = element_blank(), legend.position = "none") +
geom_point(size=1, stroke = 1) +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance")))FigX: PCA plots (all samples)
Plot differenciated cells only
setwd("~/github/amnion.vs.other_RNASeq")
counts2 = 'assets/counts-dotted.tsv'
groupFile = 'assets/batch-dotted.v2.tsv'
coldata2 <- read.csv(groupFile, row.names=1, sep="\t", stringsAsFactors = TRUE)
cts2 <- as.matrix(read.csv(counts2,sep="\t",row.names="gene.ids"))
all(rownames(coldata2) %in% colnames(cts2))
#> [1] TRUE
cts2 <- cts2[, rownames(coldata2)]Inspect the coldata
DT::datatable(coldata2)cov1 <- as.factor(coldata2$authors)
adjusted_counts <- ComBat_seq(cts2, batch=cov1, group=NULL)
#> Found 3 batches
#> Using null model in ComBat-seq.
#> Adjusting for 0 covariate(s) or covariate level(s)
#> Estimating dispersions
#> Fitting the GLM model
#> Shrinkage off - using GLM estimates for parameters
#> Adjusting the data
all(rownames(coldata2) %in% colnames(cts2))
#> [1] TRUE
cts2 <- cts2[, rownames(coldata2)]dds <- DESeqDataSetFromMatrix(countData = adjusted_counts,
colData = coldata2,
design = ~ group)vsd <- vst(dds, blind=FALSE)
pcaData <- plotPCA(vsd, intgroup=c("group", "authors"), returnData=TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))ggplotly(ggplot(pcaData, aes(PC1, PC2, color=group.1, shape=authors)) +
scale_shape_manual(values=1:8) +
theme_bw() +
theme(legend.title = element_blank(), legend.position = "none") +
geom_point(size=1, stroke = 1) +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance")))FigX: PCA plots (all samples)